# # -*- coding: utf-8 -*-
# import math
# from scrapy import Request
# from zc_core.util.batch_gen import time_to_batch_no
# from zc_core.util.http_util import retry_request
# from scrapy.exceptions import IgnoreRequest
# from zc_core.dao.catalog_dao import CatalogDao
# from zc_core.spiders.base import BaseSpider
# from chdtp.rules import *
#
#
# class SkuSpider(BaseSpider):
#     name = "sku_catalog"
#     custom_settings = {
#         'CONCURRENT_REQUESTS': 12,
#         'DOWNLOAD_DELAY': 0.5,
#         'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
#         'CONCURRENT_REQUESTS_PER_IP': 12,
#     }
#
#     sku_url = 'https://www.chdtp.com/hdsc/wzgl/searchCpzsAction.action'
#     # 办公分类链接
#     catalog_office_url = 'https://www.chdtp.com/hdsc/wzgl/inputFlCpzsAction.action?spxx.fl=EDB367F1AF25FB13DFD25F8A605B00C281F5B3F2FA0B02D2&spxx.flbh=000001'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
#         self.page_size = 60
#         self.max_page_limit = math.ceil(9960 / self.page_size)
#         self.supplier_list = []
#
#     def _build_list_req(self, callback, page, flbh):
#         return scrapy.FormRequest(
#             url=self.sku_url,
#             method="POST",
#             meta={
#                 'reqType': 'sku',
#                 'page': page,
#                 'batchNo': self.batch_no,
#                 'flbh': flbh,
#                 'catalog3Id': flbh
#             },
#             headers={
#                 'Content-Type': 'application/x-www-form-urlencoded'
#             },
#             formdata={
#                 'spxx.pp': '',
#                 'selectorder': '',
#                 'spxx.fl': '30758',
#                 'spxx.flbh': flbh,
#                 'selectkey': '',
#                 'spxx.fromxsjg': '',
#                 'spxx.endxsjg': '',
#                 'spxx.searchstr': '',
#                 'mapstr': '',
#                 'page.pageSize': str(self.page_size),
#                 'page.currentpage': str(page),
#                 'searchmeifids': '47885,100471,100924'
#             },
#             callback=callback,
#             errback=self.error_back,
#         )
#
#     def start_requests(self):
#         yield Request(
#             url=self.catalog_office_url,
#             meta={
#                 'reqType': 'catalog',
#                 'batchNo': self.batch_no
#             },
#             callback=self.parse_office_catalog,
#             errback=self.error_back,
#         )
#
#     def parse_office_catalog(self, response):
#         cats = parse_office_catalog(response)
#         if cats:
#             self.logger.info('品类: count[%s]' % len(cats))
#             yield Box('catalog', self.batch_no, cats)
#
#             for cat in cats:
#                 if cat.get('level') == 3:
#                     # 采集sku列表
#                     catalog3Id = cat.get('catalogId')
#                     yield self._build_list_req(self.parse_sku_list, 1, catalog3Id)
#
#
#     def parse_sku_list(self, response):
#         meta = response.meta
#         cur_page = meta.get("page")
#         catalog3_id = meta.get('catalog3Id')
#         sku_list = parse_sku_list(response)
#         if sku_list:
#             self.logger.info("清单1: catalog=%s, cnt=%s" % (catalog3_id, len(sku_list)))
#             yield Box("sku", self.batch_no, sku_list)
#             # 后续分页
#             if cur_page == 1:
#                 total_page = parse_total_page(response, self.page_size)
#                 self.logger.info("总页数: catalog=%s, total=%s" % (catalog3_id, total_page))
#                 if total_page < self.max_page_limit:
#                     for page in range(2, total_page + 1):
#                         yield self._build_list_req(self.parse_sku_list, page, catalog3_id)
#                 else:
#                     self.logger.info("超限1: catalog=%s, total=%s" % (catalog3_id, total_page))
#         else:
#             self.logger.info("无商品: catalog=%s, page=%s" % (catalog3_id, cur_page))
